TORCH_CUDA_LIB=`python -c 'from torch.utils import cpp_extension; print(f"{cpp_extension.TORCH_LIB_PATH}/libtorch_cuda.so")' 2>/dev/null`
syms=`nm $TORCH_CUDA_LIB | grep ncclKernel | grep -v __device_stub__ | c++filt | awk 'BEGIN{a=""} /ncclKernel_/{fn=$3;sub(/\(.*/, "", fn);a = "{" "0x"$1 ", \"" fn "\"},\n" a} END{print a}'`
cat << EOF > $1
#pragma once
// this file is auto generated, do not modify!

#include <unordered_map>
#include <unordered_set>
#include <string>

namespace atorch {

std::unordered_set<const void*> fns_to_skip;
std::unordered_map<const void*, std::string> fns_to_name;
std::unordered_map<ptrdiff_t, std::string> addr_to_name{
$syms
};
}  // namespace atorch
EOF

# generate file like this
#pragma once
#include <unordered_map>
#include <unordered_set>
#include <string>

#namespace atorch {
#std::unordered_set<const void*> fns_to_skip;
#std::unordered_map<const void*, std::string> fns_to_name;
#std::unordered_map<ptrdiff_t, std::string> syms_to_name{
#{0x000000000403b050, "ncclKernel_Reduce_NVLS_LL_Sum_half"},
#{0x000000000403ae10, "ncclKernel_Reduce_RING_LL_Sum_half"},
#{0x000000000403ad50, "ncclKernel_Reduce_TREE_LL_Sum_half"},
#{0x000000000403bbb0, "ncclKernel_Reduce_NVLS_LL_Sum_float"},
#{0x000000000403b970, "ncclKernel_Reduce_RING_LL_Sum_float"},
#{0x000000000403b8b0, "ncclKernel_Reduce_TREE_LL_Sum_float"},
#{0x000000000403c710, "ncclKernel_Reduce_NVLS_LL_Sum_double"},
#{0x000000000403e930, "ncclKernel_Reduce_NVLS_LL_Sum_int8_t"},
#{0x000000000403c4d0, "ncclKernel_Reduce_RING_LL_Sum_double"},
#{0x000000000403e6f0, "ncclKernel_Reduce_RING_LL_Sum_int8_t"},
#{0x000000000403c410, "ncclKernel_Reduce_TREE_LL_Sum_double"},
#{0x000000000403e630, "ncclKernel_Reduce_TREE_LL_Sum_int8_t"},
#{0x0000000004067940, "ncclKernel_AllReduce_NVLS_LL_Sum_half"},
#{0x0000000004067700, "ncclKernel_AllReduce_RING_LL_Sum_half"},
#{0x0000000004067640, "ncclKernel_AllReduce_TREE_LL_Sum_half"},
#{0x000000000403d270, "ncclKernel_Reduce_NVLS_LL_Sum_int32_t"},
#{0x000000000403ddd0, "ncclKernel_Reduce_NVLS_LL_Sum_int64_t"},
#{0x0000000004040b50, "ncclKernel_Reduce_NVLS_LL_Sum_uint8_t"},
#{0x000000000403d030, "ncclKernel_Reduce_RING_LL_Sum_int32_t"},
#{0x000000000403db90, "ncclKernel_Reduce_RING_LL_Sum_int64_t"},
#{0x0000000004040910, "ncclKernel_Reduce_RING_LL_Sum_uint8_t"},
#{0x000000000403cf70, "ncclKernel_Reduce_TREE_LL_Sum_int32_t"},
#{0x000000000403dad0, "ncclKernel_Reduce_TREE_LL_Sum_int64_t"},
#{0x0000000004040850, "ncclKernel_Reduce_TREE_LL_Sum_uint8_t"},
#{0x00000000040684a0, "ncclKernel_AllReduce_NVLS_LL_Sum_float"},
#{0x0000000004068260, "ncclKernel_AllReduce_RING_LL_Sum_float"},
#{0x00000000040681a0, "ncclKernel_AllReduce_TREE_LL_Sum_float"},
#{0x000000000403f490, "ncclKernel_Reduce_NVLS_LL_Sum_uint32_t"},
#{0x000000000403fff0, "ncclKernel_Reduce_NVLS_LL_Sum_uint64_t"},
#{0x000000000403f250, "ncclKernel_Reduce_RING_LL_Sum_uint32_t"},
#{0x000000000403fdb0, "ncclKernel_Reduce_RING_LL_Sum_uint64_t"},
#{0x000000000403f190, "ncclKernel_Reduce_TREE_LL_Sum_uint32_t"},
#{0x000000000403fcf0, "ncclKernel_Reduce_TREE_LL_Sum_uint64_t"},
#{0x0000000004066280, "ncclKernel_AllGather_NVLS_LL_Sum_int8_t"},
#{0x0000000004066040, "ncclKernel_AllGather_RING_LL_Sum_int8_t"},
#{0x0000000004065f80, "ncclKernel_AllGather_TREE_LL_Sum_int8_t"},
#{0x0000000004069000, "ncclKernel_AllReduce_NVLS_LL_Sum_double"},
#{0x000000000406b220, "ncclKernel_AllReduce_NVLS_LL_Sum_int8_t"},
#{0x0000000004068dc0, "ncclKernel_AllReduce_RING_LL_Sum_double"},
#{0x000000000406afe0, "ncclKernel_AllReduce_RING_LL_Sum_int8_t"},
#{0x0000000004068d00, "ncclKernel_AllReduce_TREE_LL_Sum_double"},
#{0x000000000406af20, "ncclKernel_AllReduce_TREE_LL_Sum_int8_t"},
#{0x0000000004072830, "ncclKernel_Broadcast_NVLS_LL_Sum_int8_t"},
#{0x00000000040725f0, "ncclKernel_Broadcast_RING_LL_Sum_int8_t"},
#{0x0000000004072530, "ncclKernel_Broadcast_TREE_LL_Sum_int8_t"},
#{0x000000000403ac90, "ncclKernel_Reduce_NVLS_TREE_LL_Sum_half"},
#{0x0000000004069b60, "ncclKernel_AllReduce_NVLS_LL_Sum_int32_t"},
#{0x000000000406a6c0, "ncclKernel_AllReduce_NVLS_LL_Sum_int64_t"},
#{0x000000000406d440, "ncclKernel_AllReduce_NVLS_LL_Sum_uint8_t"},
#{0x0000000004069920, "ncclKernel_AllReduce_RING_LL_Sum_int32_t"},
#{0x000000000406a480, "ncclKernel_AllReduce_RING_LL_Sum_int64_t"},
#{0x000000000406d200, "ncclKernel_AllReduce_RING_LL_Sum_uint8_t"},
#{0x0000000004069860, "ncclKernel_AllReduce_TREE_LL_Sum_int32_t"},
#{0x000000000406a3c0, "ncclKernel_AllReduce_TREE_LL_Sum_int64_t"},
#{0x000000000406d140, "ncclKernel_AllReduce_TREE_LL_Sum_uint8_t"},
#{0x000000000403b7f0, "ncclKernel_Reduce_NVLS_TREE_LL_Sum_float"},
#{0x000000000406bd80, "ncclKernel_AllReduce_NVLS_LL_Sum_uint32_t"},
#{0x000000000406c8e0, "ncclKernel_AllReduce_NVLS_LL_Sum_uint64_t"},
#{0x000000000406bb40, "ncclKernel_AllReduce_RING_LL_Sum_uint32_t"},
#{0x000000000406c6a0, "ncclKernel_AllReduce_RING_LL_Sum_uint64_t"},
#{0x000000000406ba80, "ncclKernel_AllReduce_TREE_LL_Sum_uint32_t"},
#{0x000000000406c5e0, "ncclKernel_AllReduce_TREE_LL_Sum_uint64_t"},
#{0x000000000403c350, "ncclKernel_Reduce_NVLS_TREE_LL_Sum_double"},
#{0x000000000403e570, "ncclKernel_Reduce_NVLS_TREE_LL_Sum_int8_t"},
#{0x0000000004033e90, "ncclKernel_ReduceScatter_NVLS_LL_Sum_half"},
#{0x0000000004033c50, "ncclKernel_ReduceScatter_RING_LL_Sum_half"},
#{0x0000000004033b90, "ncclKernel_ReduceScatter_TREE_LL_Sum_half"},
#{0x0000000004067580, "ncclKernel_AllReduce_NVLS_TREE_LL_Sum_half"},
#{0x000000000403ceb0, "ncclKernel_Reduce_NVLS_TREE_LL_Sum_int32_t"},
#{0x000000000403da10, "ncclKernel_Reduce_NVLS_TREE_LL_Sum_int64_t"},
#{0x0000000004040790, "ncclKernel_Reduce_NVLS_TREE_LL_Sum_uint8_t"},
#{0x00000000040349f0, "ncclKernel_ReduceScatter_NVLS_LL_Sum_float"},
#{0x00000000040347b0, "ncclKernel_ReduceScatter_RING_LL_Sum_float"},
#{0x00000000040346f0, "ncclKernel_ReduceScatter_TREE_LL_Sum_float"},
#{0x000000000404a2e0, "ncclKernel_SendRecv_RING_SIMPLE_Sum_int8_t"},
#{0x00000000040680e0, "ncclKernel_AllReduce_NVLS_TREE_LL_Sum_float"},
#{0x000000000403af90, "ncclKernel_Reduce_COLLNET_CHAIN_LL_Sum_half"},
#{0x000000000403a4f0, "ncclKernel_Reduce_NVLS_LL_Sum___nv_bfloat16"},
#{0x000000000403f0d0, "ncclKernel_Reduce_NVLS_TREE_LL_Sum_uint32_t"},
#{0x000000000403fc30, "ncclKernel_Reduce_NVLS_TREE_LL_Sum_uint64_t"},
#{0x000000000403a2b0, "ncclKernel_Reduce_RING_LL_Sum___nv_bfloat16"},
#{0x0000000004035550, "ncclKernel_ReduceScatter_NVLS_LL_Sum_double"},
#{0x0000000004037770, "ncclKernel_ReduceScatter_NVLS_LL_Sum_int8_t"},
#{0x0000000004035310, "ncclKernel_ReduceScatter_RING_LL_Sum_double"},
#{0x0000000004037530, "ncclKernel_ReduceScatter_RING_LL_Sum_int8_t"},
#{0x0000000004035250, "ncclKernel_ReduceScatter_TREE_LL_Sum_double"},
#{0x0000000004037470, "ncclKernel_ReduceScatter_TREE_LL_Sum_int8_t"},
#{0x000000000403a1f0, "ncclKernel_Reduce_TREE_LL_Sum___nv_bfloat16"},
#{0x0000000004065ec0, "ncclKernel_AllGather_NVLS_TREE_LL_Sum_int8_t"},
#{0x0000000004068c40, "ncclKernel_AllReduce_NVLS_TREE_LL_Sum_double"},
#{0x000000000406ae60, "ncclKernel_AllReduce_NVLS_TREE_LL_Sum_int8_t"},
#{0x0000000004072470, "ncclKernel_Broadcast_NVLS_TREE_LL_Sum_int8_t"},
#{0x000000000403baf0, "ncclKernel_Reduce_COLLNET_CHAIN_LL_Sum_float"},
#{0x000000000403aed0, "ncclKernel_Reduce_COLLNET_DIRECT_LL_Sum_half"},
#{0x00000000040360b0, "ncclKernel_ReduceScatter_NVLS_LL_Sum_int32_t"},
#{0x0000000004036c10, "ncclKernel_ReduceScatter_NVLS_LL_Sum_int64_t"},
#{0x0000000004039990, "ncclKernel_ReduceScatter_NVLS_LL_Sum_uint8_t"},
#{0x0000000004035e70, "ncclKernel_ReduceScatter_RING_LL_Sum_int32_t"},
#{0x00000000040369d0, "ncclKernel_ReduceScatter_RING_LL_Sum_int64_t"},
#{0x0000000004039750, "ncclKernel_ReduceScatter_RING_LL_Sum_uint8_t"},
#{0x0000000004035db0, "ncclKernel_ReduceScatter_TREE_LL_Sum_int32_t"},
#{0x0000000004036910, "ncclKernel_ReduceScatter_TREE_LL_Sum_int64_t"},
#{0x0000000004039690, "ncclKernel_ReduceScatter_TREE_LL_Sum_uint8_t"},
#{0x00000000040697a0, "ncclKernel_AllReduce_NVLS_TREE_LL_Sum_int32_t"},
#{0x000000000406a300, "ncclKernel_AllReduce_NVLS_TREE_LL_Sum_int64_t"},
#{0x000000000406d080, "ncclKernel_AllReduce_NVLS_TREE_LL_Sum_uint8_t"},
#{0x000000000403c650, "ncclKernel_Reduce_COLLNET_CHAIN_LL_Sum_double"},
#{0x000000000403e870, "ncclKernel_Reduce_COLLNET_CHAIN_LL_Sum_int8_t"},
#{0x000000000403ba30, "ncclKernel_Reduce_COLLNET_DIRECT_LL_Sum_float"},
#{0x00000000040382d0, "ncclKernel_ReduceScatter_NVLS_LL_Sum_uint32_t"},
#{0x0000000004038e30, "ncclKernel_ReduceScatter_NVLS_LL_Sum_uint64_t"},
#{0x0000000004038090, "ncclKernel_ReduceScatter_RING_LL_Sum_uint32_t"},
#{0x0000000004038bf0, "ncclKernel_ReduceScatter_RING_LL_Sum_uint64_t"},
#{0x0000000004037fd0, "ncclKernel_ReduceScatter_TREE_LL_Sum_uint32_t"},
#{0x0000000004038b30, "ncclKernel_ReduceScatter_TREE_LL_Sum_uint64_t"},
#{0x0000000004067880, "ncclKernel_AllReduce_COLLNET_CHAIN_LL_Sum_half"},
#{0x0000000004066de0, "ncclKernel_AllReduce_NVLS_LL_Sum___nv_bfloat16"},
#{0x000000000406b9c0, "ncclKernel_AllReduce_NVLS_TREE_LL_Sum_uint32_t"},
#{0x000000000406c520, "ncclKernel_AllReduce_NVLS_TREE_LL_Sum_uint64_t"},
#{0x0000000004066ba0, "ncclKernel_AllReduce_RING_LL_Sum___nv_bfloat16"},
#{0x0000000004066ae0, "ncclKernel_AllReduce_TREE_LL_Sum___nv_bfloat16"},
#{0x000000000403d1b0, "ncclKernel_Reduce_COLLNET_CHAIN_LL_Sum_int32_t"},
#{0x000000000403dd10, "ncclKernel_Reduce_COLLNET_CHAIN_LL_Sum_int64_t"},
#{0x0000000004040a90, "ncclKernel_Reduce_COLLNET_CHAIN_LL_Sum_uint8_t"},
#{0x000000000403c590, "ncclKernel_Reduce_COLLNET_DIRECT_LL_Sum_double"},
#{0x000000000403e7b0, "ncclKernel_Reduce_COLLNET_DIRECT_LL_Sum_int8_t"},
#{0x0000000004033ad0, "ncclKernel_ReduceScatter_NVLS_TREE_LL_Sum_half"},
#{0x00000000040683e0, "ncclKernel_AllReduce_COLLNET_CHAIN_LL_Sum_float"},
#{0x00000000040677c0, "ncclKernel_AllReduce_COLLNET_DIRECT_LL_Sum_half"},
#{0x000000000403f3d0, "ncclKernel_Reduce_COLLNET_CHAIN_LL_Sum_uint32_t"},
#{0x000000000403ff30, "ncclKernel_Reduce_COLLNET_CHAIN_LL_Sum_uint64_t"},
#{0x000000000403d0f0, "ncclKernel_Reduce_COLLNET_DIRECT_LL_Sum_int32_t"},
#{0x000000000403dc50, "ncclKernel_Reduce_COLLNET_DIRECT_LL_Sum_int64_t"},
#{0x00000000040409d0, "ncclKernel_Reduce_COLLNET_DIRECT_LL_Sum_uint8_t"},
#{0x0000000004034630, "ncclKernel_ReduceScatter_NVLS_TREE_LL_Sum_float"},
#{0x00000000040661c0, "ncclKernel_AllGather_COLLNET_CHAIN_LL_Sum_int8_t"},
#{0x0000000004068f40, "ncclKernel_AllReduce_COLLNET_CHAIN_LL_Sum_double"},
#{0x000000000406b160, "ncclKernel_AllReduce_COLLNET_CHAIN_LL_Sum_int8_t"},
#{0x0000000004068320, "ncclKernel_AllReduce_COLLNET_DIRECT_LL_Sum_float"},
#{0x0000000004072770, "ncclKernel_Broadcast_COLLNET_CHAIN_LL_Sum_int8_t"},
#{0x000000000403f310, "ncclKernel_Reduce_COLLNET_DIRECT_LL_Sum_uint32_t"},
#{0x000000000403fe70, "ncclKernel_Reduce_COLLNET_DIRECT_LL_Sum_uint64_t"},
#{0x000000000403a130, "ncclKernel_Reduce_NVLS_TREE_LL_Sum___nv_bfloat16"},
#{0x0000000004035190, "ncclKernel_ReduceScatter_NVLS_TREE_LL_Sum_double"},
#{0x00000000040373b0, "ncclKernel_ReduceScatter_NVLS_TREE_LL_Sum_int8_t"},
#{0x0000000004066100, "ncclKernel_AllGather_COLLNET_DIRECT_LL_Sum_int8_t"},
#{0x0000000004069aa0, "ncclKernel_AllReduce_COLLNET_CHAIN_LL_Sum_int32_t"},
#{0x000000000406a600, "ncclKernel_AllReduce_COLLNET_CHAIN_LL_Sum_int64_t"},
#{0x000000000406d380, "ncclKernel_AllReduce_COLLNET_CHAIN_LL_Sum_uint8_t"},
#{0x0000000004068e80, "ncclKernel_AllReduce_COLLNET_DIRECT_LL_Sum_double"},
#{0x000000000406b0a0, "ncclKernel_AllReduce_COLLNET_DIRECT_LL_Sum_int8_t"},
#{0x00000000040726b0, "ncclKernel_Broadcast_COLLNET_DIRECT_LL_Sum_int8_t"},
#{0x0000000004035cf0, "ncclKernel_ReduceScatter_NVLS_TREE_LL_Sum_int32_t"},
#{0x0000000004036850, "ncclKernel_ReduceScatter_NVLS_TREE_LL_Sum_int64_t"},
#{0x00000000040395d0, "ncclKernel_ReduceScatter_NVLS_TREE_LL_Sum_uint8_t"},
#{0x000000000406bcc0, "ncclKernel_AllReduce_COLLNET_CHAIN_LL_Sum_uint32_t"},
#{0x000000000406c820, "ncclKernel_AllReduce_COLLNET_CHAIN_LL_Sum_uint64_t"},
#{0x00000000040699e0, "ncclKernel_AllReduce_COLLNET_DIRECT_LL_Sum_int32_t"},
#{0x000000000406a540, "ncclKernel_AllReduce_COLLNET_DIRECT_LL_Sum_int64_t"},
#{0x000000000406d2c0, "ncclKernel_AllReduce_COLLNET_DIRECT_LL_Sum_uint8_t"},
#{0x0000000004033dd0, "ncclKernel_ReduceScatter_COLLNET_CHAIN_LL_Sum_half"},
#{0x0000000004033330, "ncclKernel_ReduceScatter_NVLS_LL_Sum___nv_bfloat16"},
#{0x0000000004037f10, "ncclKernel_ReduceScatter_NVLS_TREE_LL_Sum_uint32_t"},
#{0x0000000004038a70, "ncclKernel_ReduceScatter_NVLS_TREE_LL_Sum_uint64_t"},
#{0x00000000040330f0, "ncclKernel_ReduceScatter_RING_LL_Sum___nv_bfloat16"},
#{0x0000000004033030, "ncclKernel_ReduceScatter_TREE_LL_Sum___nv_bfloat16"},
#{0x000000000406bc00, "ncclKernel_AllReduce_COLLNET_DIRECT_LL_Sum_uint32_t"},
#{0x000000000406c760, "ncclKernel_AllReduce_COLLNET_DIRECT_LL_Sum_uint64_t"},
#{0x0000000004066a20, "ncclKernel_AllReduce_NVLS_TREE_LL_Sum___nv_bfloat16"},
#{0x0000000004034930, "ncclKernel_ReduceScatter_COLLNET_CHAIN_LL_Sum_float"},
#{0x0000000004033d10, "ncclKernel_ReduceScatter_COLLNET_DIRECT_LL_Sum_half"},
#{0x000000000403a430, "ncclKernel_Reduce_COLLNET_CHAIN_LL_Sum___nv_bfloat16"},
#{0x0000000004035490, "ncclKernel_ReduceScatter_COLLNET_CHAIN_LL_Sum_double"},
#{0x00000000040376b0, "ncclKernel_ReduceScatter_COLLNET_CHAIN_LL_Sum_int8_t"},
#{0x0000000004034870, "ncclKernel_ReduceScatter_COLLNET_DIRECT_LL_Sum_float"},
#{0x000000000403a370, "ncclKernel_Reduce_COLLNET_DIRECT_LL_Sum___nv_bfloat16"},
#{0x0000000004035ff0, "ncclKernel_ReduceScatter_COLLNET_CHAIN_LL_Sum_int32_t"},
#{0x0000000004036b50, "ncclKernel_ReduceScatter_COLLNET_CHAIN_LL_Sum_int64_t"},
#{0x00000000040398d0, "ncclKernel_ReduceScatter_COLLNET_CHAIN_LL_Sum_uint8_t"},
#{0x00000000040353d0, "ncclKernel_ReduceScatter_COLLNET_DIRECT_LL_Sum_double"},
#{0x00000000040375f0, "ncclKernel_ReduceScatter_COLLNET_DIRECT_LL_Sum_int8_t"},
#{0x0000000004038210, "ncclKernel_ReduceScatter_COLLNET_CHAIN_LL_Sum_uint32_t"},
#{0x0000000004038d70, "ncclKernel_ReduceScatter_COLLNET_CHAIN_LL_Sum_uint64_t"},
#{0x0000000004035f30, "ncclKernel_ReduceScatter_COLLNET_DIRECT_LL_Sum_int32_t"},
#{0x0000000004036a90, "ncclKernel_ReduceScatter_COLLNET_DIRECT_LL_Sum_int64_t"},
#{0x0000000004039810, "ncclKernel_ReduceScatter_COLLNET_DIRECT_LL_Sum_uint8_t"},
#{0x0000000004066d20, "ncclKernel_AllReduce_COLLNET_CHAIN_LL_Sum___nv_bfloat16"},
#{0x0000000004038150, "ncclKernel_ReduceScatter_COLLNET_DIRECT_LL_Sum_uint32_t"},
#{0x0000000004038cb0, "ncclKernel_ReduceScatter_COLLNET_DIRECT_LL_Sum_uint64_t"},
#{0x0000000004032f70, "ncclKernel_ReduceScatter_NVLS_TREE_LL_Sum___nv_bfloat16"},
#{0x0000000004066c60, "ncclKernel_AllReduce_COLLNET_DIRECT_LL_Sum___nv_bfloat16"},
#{0x0000000004033270, "ncclKernel_ReduceScatter_COLLNET_CHAIN_LL_Sum___nv_bfloat16"},
#{0x00000000040331b0, "ncclKernel_ReduceScatter_COLLNET_DIRECT_LL_Sum___nv_bfloat16"},
#};
#}
